Here, we’re just setting a few options.

knitr::opts_chunk$set(
  warning = TRUE, # show warnings during codebook generation
  message = TRUE, # show messages during codebook generation
  error = TRUE, # do not interrupt codebook generation in case of errors,
                # usually better for debugging
  echo = TRUE  # show R code
)
ggplot2::theme_set(ggplot2::theme_bw())
pander::panderOptions("table.split.table", Inf)

Now, we’re preparing our data for the codebook.

library(codebook)
library(here)
## here() starts at C:/Users/eirin/Documents/MPI Psycholinguistics/Teaching/IMPRSConf2020
library(readr)
codebook_data <- read_csv(here("raw-data",  "2019.csv"))
## Parsed with column specification:
## cols(
##   `Overall rank` = col_double(),
##   `Country or region` = col_character(),
##   Score = col_double(),
##   `GDP per capita` = col_double(),
##   `Social support` = col_double(),
##   `Healthy life expectancy` = col_double(),
##   `Freedom to make life choices` = col_double(),
##   Generosity = col_double(),
##   `Perceptions of corruption` = col_double()
## )
### I have added a name to this dataset as metadata
metadata(codebook_data)$name <- "World Happiness reports data"

### practice!
### add temporal coverage metadata to this codebook
### take a look at the vignette if you're not sure how


# omit the following lines, if your missing values are already properly labelled
codebook_data <- detect_missing(codebook_data,
    only_labelled = TRUE, # only labelled values are autodetected as missing
    negative_values_are_missing = FALSE, # negative values are missing values
    ninety_nine_problems = TRUE,   # 99/999 are missing values, if they are more than 5 MAD from the median
    )

# The following line finds item aggregates with names like this:
# scale = scale_1 + scale_2R + scale_3R
# identifying these aggregates allows the codebook function to
# automatically compute reliabilities.
# However, it will not reverse items automatically.
codebook_data <- detect_scales(codebook_data)

Create codebook

codebook(codebook_data)
## No missing values.
knitr::asis_output(data_info)

Metadata

Description

if (exists("name", meta)) {
  glue::glue(
    "__Dataset name__: {name}",
    .envir = meta)
}

Dataset name: World Happiness reports data

cat(description)

The dataset has N=156 rows and 9 columns. 156 rows have no missing values on any column.

Metadata for search engines

  • Date published: 2020-06-01
meta <- meta[setdiff(names(meta),
                     c("creator", "datePublished", "identifier",
                       "url", "citation", "spatialCoverage", 
                       "temporalCoverage", "description", "name"))]
pander::pander(meta)
  • keywords: Overall rank, Country or region, Score, GDP per capita, Social support, Healthy life expectancy, Freedom to make life choices, Generosity and Perceptions of corruption
knitr::asis_output(survey_overview)

Variables

if (detailed_variables || detailed_scales) {
  knitr::asis_output(paste0(scales_items, sep = "\n\n\n", collapse = "\n\n\n"))
}

Overall rank

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Overall rank numeric 0 1 1 78 156 78.5 45.17743 ▇▇▇▇▇ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Country or region

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate n_unique empty min max whitespace label
Country or region character 0 1 156 0 4 24 0 NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Score

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Score numeric 0 1 2.9 5.4 7.8 5.407096 1.11312 ▂▇▇▇▃ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

GDP per capita

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
GDP per capita numeric 0 1 0 0.96 1.7 0.9051474 0.3983895 ▃▅▇▇▃ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Social support

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Social support numeric 0 1 0 1.3 1.6 1.208814 0.2991914 ▁▁▂▆▇ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Healthy life expectancy

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Healthy life expectancy numeric 0 1 0 0.79 1.1 0.7252436 0.242124 ▁▃▃▇▅ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Freedom to make life choices

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Freedom to make life choices numeric 0 1 0 0.42 0.63 0.3925705 0.1432895 ▁▃▆▇▆ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Generosity

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Generosity numeric 0 1 0 0.18 0.57 0.1848462 0.0952544 ▆▇▆▁▁ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}

Perceptions of corruption

Distribution

show_missing_values <- FALSE
if (has_labels(item)) {
  missing_values <- item[is.na(haven::zap_missing(item))]
  attributes(missing_values) <- attributes(item)
  if (!is.null(attributes(item)$labels)) {
    attributes(missing_values)$labels <- attributes(missing_values)$labels[is.na(attributes(missing_values)$labels)]
    attributes(item)$labels <- attributes(item)$labels[!is.na(attributes(item)$labels)]
  }
  if (is.double(item)) {
    show_missing_values <- length(unique(haven::na_tag(missing_values))) > 1
    item <- haven::zap_missing(item)
  }
  if (length(item_attributes$labels) == 0 && is.numeric(item)) {
    item <- haven::zap_labels(item)
  }
}
item_nomiss <- item[!is.na(item)]

# unnest mc_multiple and so on
if (
  is.character(item_nomiss) &&
  any(stringr::str_detect(item_nomiss, stringr::fixed(", "))) &&
  !is.null(item_info) &&
  (exists("type", item_info) && 
    any(stringr::str_detect(item_info$type, 
                            pattern = stringr::fixed("multiple"))))
  ) {
  item_nomiss <- unlist(stringr::str_split(item_nomiss, pattern = stringr::fixed(", ")))
}
attributes(item_nomiss) <- attributes(item)

old_height <- knitr::opts_chunk$get("fig.height")
non_missing_choices <- item_attributes[["labels"]]
many_labels <- length(non_missing_choices) > 7
go_vertical <- !is_numeric_or_time_var(item_nomiss) || many_labels
  
if ( go_vertical ) {
  # numeric items are plotted horizontally (because that's what usually expected)
  # categorical items are plotted vertically because we can use the screen real estate better this way

    if (is.null(choices) || 
        dplyr::n_distinct(item_nomiss) > length(non_missing_choices)) {
        non_missing_choices <- unique(item_nomiss)
        names(non_missing_choices) <- non_missing_choices
    }
  choice_multiplier <- old_height/6.5
    new_height <- 2 + choice_multiplier * length(non_missing_choices)
    new_height <- ifelse(new_height > 20, 20, new_height)
    new_height <- ifelse(new_height < 1, 1, new_height)
    if(could_disclose_unique_values(item_nomiss) && is.character(item_nomiss)) {
      new_height <- old_height
    }
    knitr::opts_chunk$set(fig.height = new_height)
}

wrap_at <- knitr::opts_chunk$get("fig.width") * 10
# todo: if there are free-text choices mingled in with the pre-defined ones, don't show
# todo: show rare items if they are pre-defined
# todo: bin rare responses into "other category"
if (!length(item_nomiss)) {
  cat("No non-missing values to show.")
} else if (!could_disclose_unique_values(item_nomiss)) {
  plot_labelled(item_nomiss, item_name, wrap_at, go_vertical)
} else {
  if (is.character(item_nomiss)) {
      char_count <- stringr::str_count(item_nomiss)
      attributes(char_count)$label <- item_label
      plot_labelled(char_count, 
                    item_name, wrap_at, FALSE, trans = "log1p", "characters")
  } else {
      cat(dplyr::n_distinct(item_nomiss), " unique, categorical values, so not shown.")
  }
}

knitr::opts_chunk$set(fig.height = old_height)

0 missing values.

Summary statistics

attributes(item) <- item_attributes
df = data.frame(item, stringsAsFactors = FALSE)
names(df) = html_item_name
escaped_table(codebook_table(df))
name data_type n_missing complete_rate min median max mean sd hist label
Perceptions of corruption numeric 0 1 0 0.085 0.45 0.1106026 0.0945378 ▇▅▁▁▁ NA
if (show_missing_values) {
  plot_labelled(missing_values, item_name, wrap_at)
}
if (!is.null(item_info)) {
  # don't show choices again, if they're basically same thing as value labels
  if (!is.null(choices) && !is.null(item_info$choices) && 
    all(names(na.omit(choices)) == item_info$choices) &&
    all(na.omit(choices) == names(item_info$choices))) {
    item_info$choices <- NULL
  }
  item_info$label_parsed <- 
    item_info$choice_list <- item_info$study_id <- item_info$id <- NULL
  pander::pander(item_info)
}
if (!is.null(choices) && length(choices) && length(choices) < 30) {
    pander::pander(as.list(choices))
}
missingness_report

Missingness report

if (length(md_pattern)) {
  if (knitr::is_html_output()) {
    rmarkdown::paged_table(md_pattern, options = list(rows.print = 10))
  } else {
    knitr::kable(md_pattern)
  }
}
items

Codebook table

export_table(metadata_table)
jsonld

JSON-LD metadata The following JSON-LD can be found by search engines, if you share this codebook publicly on the web.

{
  "name": "World Happiness reports data",
  "datePublished": "2020-06-01",
  "description": "The dataset has N=156 rows and 9 columns.\n156 rows have no missing values on any column.\n\n\n## Table of variables\nThis table contains variable names, labels, and number of missing values.\nSee the complete codebook for more.\n\n|name                         |label | n_missing|\n|:----------------------------|:-----|---------:|\n|Overall rank                 |NA    |         0|\n|Country or region            |NA    |         0|\n|Score                        |NA    |         0|\n|GDP per capita               |NA    |         0|\n|Social support               |NA    |         0|\n|Healthy life expectancy      |NA    |         0|\n|Freedom to make life choices |NA    |         0|\n|Generosity                   |NA    |         0|\n|Perceptions of corruption    |NA    |         0|\n\n### Note\nThis dataset was automatically described using the [codebook R package](https://rubenarslan.github.io/codebook/) (version 0.8.2).",
  "keywords": ["Overall rank", "Country or region", "Score", "GDP per capita", "Social support", "Healthy life expectancy", "Freedom to make life choices", "Generosity", "Perceptions of corruption"],
  "@context": "http://schema.org/",
  "@type": "Dataset",
  "variableMeasured": [
    {
      "name": "Overall rank",
      "@type": "propertyValue"
    },
    {
      "name": "Country or region",
      "@type": "propertyValue"
    },
    {
      "name": "Score",
      "@type": "propertyValue"
    },
    {
      "name": "GDP per capita",
      "@type": "propertyValue"
    },
    {
      "name": "Social support",
      "@type": "propertyValue"
    },
    {
      "name": "Healthy life expectancy",
      "@type": "propertyValue"
    },
    {
      "name": "Freedom to make life choices",
      "@type": "propertyValue"
    },
    {
      "name": "Generosity",
      "@type": "propertyValue"
    },
    {
      "name": "Perceptions of corruption",
      "@type": "propertyValue"
    }
  ]
}`